from langchain_openai import OpenAIEmbeddings
import openai
from langchain_chroma import Chroma
#from langchain.text_splitter import CharacterTextSplitter
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_community.retrievers import KNNRetriever

api_key = "sk-6S0PtpNia71gjcfwSsDPsJ9mGqsVPr2XRQzAx1dHbJS7RW4t"
api_base="https://chatapi.littlewheat.com/v1"

embedding_model=OpenAIEmbeddings(model="text-embedding-ada-002",openai_api_key=api_key,openai_api_base=api_base)

embedding=embedding_model.embed_documents([
        "Hi there!",
        "Oh, hello!",
        "What's your name?",
        "My friends call me World",
        "Hello World!"
    ])
print(len(embedding))
print(embedding[0])

text = TextLoader(r"F:\ai\03大模型开发实战\05langchain深度解析\LangChain全面剖析之Retrieval资料\data\消失的她.txt",encoding="utf-8").load()
textSplit = CharacterTextSplitter(chunk_size=100,chunk_overlap=0)
docs=textSplit.split_documents(text)
print(docs)

db=Chroma.from_documents(docs,embedding_model)

query = "《消失的她》这部电影里主要讲的是一个什么故事？"
db_search = db.similarity_search(query)
print(db_search)

retrieval = KNNRetriever.from_texts(
        ["foo", "bar", "world", "hello", "foo bar"], embedding_model
)
result = retrieval.invoke("foo")
print(result)